dotenvy/
parse.rs

1use std::collections::HashMap;
2use std::env;
3
4use crate::errors::*;
5
6// for readability's sake
7pub type ParsedLine = Result<Option<(String, String)>>;
8
9pub fn parse_line(
10    line: &str,
11    substitution_data: &mut HashMap<String, Option<String>>,
12) -> ParsedLine {
13    let mut parser = LineParser::new(line, substitution_data);
14    parser.parse_line()
15}
16
17struct LineParser<'a> {
18    original_line: &'a str,
19    substitution_data: &'a mut HashMap<String, Option<String>>,
20    line: &'a str,
21    pos: usize,
22}
23
24impl<'a> LineParser<'a> {
25    fn new(
26        line: &'a str,
27        substitution_data: &'a mut HashMap<String, Option<String>>,
28    ) -> LineParser<'a> {
29        LineParser {
30            original_line: line,
31            substitution_data,
32            line: line.trim_end(), // we don’t want trailing whitespace
33            pos: 0,
34        }
35    }
36
37    fn err(&self) -> Error {
38        Error::LineParse(self.original_line.into(), self.pos)
39    }
40
41    fn parse_line(&mut self) -> ParsedLine {
42        self.skip_whitespace();
43        // if its an empty line or a comment, skip it
44        if self.line.is_empty() || self.line.starts_with('#') {
45            return Ok(None);
46        }
47
48        let mut key = self.parse_key()?;
49        self.skip_whitespace();
50
51        // export can be either an optional prefix or a key itself
52        if key == "export" {
53            // here we check for an optional `=`, below we throw directly when it’s not found.
54            if self.expect_equal().is_err() {
55                key = self.parse_key()?;
56                self.skip_whitespace();
57                self.expect_equal()?;
58            }
59        } else {
60            self.expect_equal()?;
61        }
62        self.skip_whitespace();
63
64        if self.line.is_empty() || self.line.starts_with('#') {
65            self.substitution_data.insert(key.clone(), None);
66            return Ok(Some((key, String::new())));
67        }
68
69        let parsed_value = parse_value(self.line, self.substitution_data)?;
70        self.substitution_data
71            .insert(key.clone(), Some(parsed_value.clone()));
72
73        Ok(Some((key, parsed_value)))
74    }
75
76    fn parse_key(&mut self) -> Result<String> {
77        if !self
78            .line
79            .starts_with(|c: char| c.is_ascii_alphabetic() || c == '_')
80        {
81            return Err(self.err());
82        }
83        let index = match self
84            .line
85            .find(|c: char| !(c.is_ascii_alphanumeric() || c == '_' || c == '.'))
86        {
87            Some(index) => index,
88            None => self.line.len(),
89        };
90        self.pos += index;
91        let key = String::from(&self.line[..index]);
92        self.line = &self.line[index..];
93        Ok(key)
94    }
95
96    fn expect_equal(&mut self) -> Result<()> {
97        if !self.line.starts_with('=') {
98            return Err(self.err());
99        }
100        self.line = &self.line[1..];
101        self.pos += 1;
102        Ok(())
103    }
104
105    fn skip_whitespace(&mut self) {
106        if let Some(index) = self.line.find(|c: char| !c.is_whitespace()) {
107            self.pos += index;
108            self.line = &self.line[index..];
109        } else {
110            self.pos += self.line.len();
111            self.line = "";
112        }
113    }
114}
115
116#[derive(Eq, PartialEq)]
117enum SubstitutionMode {
118    None,
119    Block,
120    EscapedBlock,
121}
122
123fn parse_value(
124    input: &str,
125    substitution_data: &mut HashMap<String, Option<String>>,
126) -> Result<String> {
127    let mut strong_quote = false; // '
128    let mut weak_quote = false; // "
129    let mut escaped = false;
130    let mut expecting_end = false;
131
132    //FIXME can this be done without yet another allocation per line?
133    let mut output = String::new();
134
135    let mut substitution_mode = SubstitutionMode::None;
136    let mut substitution_name = String::new();
137
138    for (index, c) in input.chars().enumerate() {
139        //the regex _should_ already trim whitespace off the end
140        //expecting_end is meant to permit: k=v #comment
141        //without affecting: k=v#comment
142        //and throwing on: k=v w
143        if expecting_end {
144            if c == ' ' || c == '\t' {
145                continue;
146            } else if c == '#' {
147                break;
148            } else {
149                return Err(Error::LineParse(input.to_owned(), index));
150            }
151        } else if escaped {
152            //TODO I tried handling literal \r but various issues
153            //imo not worth worrying about until there's a use case
154            //(actually handling backslash 0x10 would be a whole other matter)
155            //then there's \v \f bell hex... etc
156            match c {
157                '\\' | '\'' | '"' | '$' | ' ' => output.push(c),
158                'n' => output.push('\n'), // handle \n case
159                _ => {
160                    return Err(Error::LineParse(input.to_owned(), index));
161                }
162            }
163
164            escaped = false;
165        } else if strong_quote {
166            if c == '\'' {
167                strong_quote = false;
168            } else {
169                output.push(c);
170            }
171        } else if substitution_mode != SubstitutionMode::None {
172            if c.is_alphanumeric() {
173                substitution_name.push(c);
174            } else {
175                match substitution_mode {
176                    SubstitutionMode::None => unreachable!(),
177                    SubstitutionMode::Block => {
178                        if c == '{' && substitution_name.is_empty() {
179                            substitution_mode = SubstitutionMode::EscapedBlock;
180                        } else {
181                            apply_substitution(
182                                substitution_data,
183                                &substitution_name.drain(..).collect::<String>(),
184                                &mut output,
185                            );
186                            if c == '$' {
187                                substitution_mode = if !strong_quote && !escaped {
188                                    SubstitutionMode::Block
189                                } else {
190                                    SubstitutionMode::None
191                                }
192                            } else {
193                                substitution_mode = SubstitutionMode::None;
194                                output.push(c);
195                            }
196                        }
197                    }
198                    SubstitutionMode::EscapedBlock => {
199                        if c == '}' {
200                            substitution_mode = SubstitutionMode::None;
201                            apply_substitution(
202                                substitution_data,
203                                &substitution_name.drain(..).collect::<String>(),
204                                &mut output,
205                            );
206                        } else {
207                            substitution_name.push(c);
208                        }
209                    }
210                }
211            }
212        } else if c == '$' {
213            substitution_mode = if !strong_quote && !escaped {
214                SubstitutionMode::Block
215            } else {
216                SubstitutionMode::None
217            }
218        } else if weak_quote {
219            if c == '"' {
220                weak_quote = false;
221            } else if c == '\\' {
222                escaped = true;
223            } else {
224                output.push(c);
225            }
226        } else if c == '\'' {
227            strong_quote = true;
228        } else if c == '"' {
229            weak_quote = true;
230        } else if c == '\\' {
231            escaped = true;
232        } else if c == ' ' || c == '\t' {
233            expecting_end = true;
234        } else {
235            output.push(c);
236        }
237    }
238
239    //XXX also fail if escaped? or...
240    if substitution_mode == SubstitutionMode::EscapedBlock || strong_quote || weak_quote {
241        let value_length = input.len();
242        Err(Error::LineParse(
243            input.to_owned(),
244            if value_length == 0 {
245                0
246            } else {
247                value_length - 1
248            },
249        ))
250    } else {
251        apply_substitution(
252            substitution_data,
253            &substitution_name.drain(..).collect::<String>(),
254            &mut output,
255        );
256        Ok(output)
257    }
258}
259
260fn apply_substitution(
261    substitution_data: &mut HashMap<String, Option<String>>,
262    substitution_name: &str,
263    output: &mut String,
264) {
265    if let Ok(environment_value) = env::var(substitution_name) {
266        output.push_str(&environment_value);
267    } else {
268        let stored_value = substitution_data
269            .get(substitution_name)
270            .unwrap_or(&None)
271            .to_owned();
272        output.push_str(&stored_value.unwrap_or_default());
273    };
274}
275
276#[cfg(test)]
277mod test {
278    use crate::iter::Iter;
279
280    use super::*;
281
282    #[test]
283    fn test_parse_line_env() {
284        // Note 5 spaces after 'KEY8=' below
285        let actual_iter = Iter::new(
286            r#"
287KEY=1
288KEY2="2"
289KEY3='3'
290KEY4='fo ur'
291KEY5="fi ve"
292KEY6=s\ ix
293KEY7=
294KEY8=     
295KEY9=   # foo
296KEY10  ="whitespace before ="
297KEY11=    "whitespace after ="
298export="export as key"
299export   SHELL_LOVER=1
300"#
301            .as_bytes(),
302        );
303
304        let expected_iter = vec![
305            ("KEY", "1"),
306            ("KEY2", "2"),
307            ("KEY3", "3"),
308            ("KEY4", "fo ur"),
309            ("KEY5", "fi ve"),
310            ("KEY6", "s ix"),
311            ("KEY7", ""),
312            ("KEY8", ""),
313            ("KEY9", ""),
314            ("KEY10", "whitespace before ="),
315            ("KEY11", "whitespace after ="),
316            ("export", "export as key"),
317            ("SHELL_LOVER", "1"),
318        ]
319        .into_iter()
320        .map(|(key, value)| (key.to_string(), value.to_string()));
321
322        let mut count = 0;
323        for (expected, actual) in expected_iter.zip(actual_iter) {
324            assert!(actual.is_ok());
325            assert_eq!(expected, actual.unwrap());
326            count += 1;
327        }
328
329        assert_eq!(count, 13);
330    }
331
332    #[test]
333    fn test_parse_line_comment() {
334        let result: Result<Vec<(String, String)>> = Iter::new(
335            r#"
336# foo=bar
337#    "#
338                .as_bytes(),
339        )
340        .collect();
341        assert!(result.unwrap().is_empty());
342    }
343
344    #[test]
345    fn test_parse_line_invalid() {
346        // Note 4 spaces after 'invalid' below
347        let actual_iter = Iter::new(
348            r#"
349  invalid    
350very bacon = yes indeed
351=value"#
352                .as_bytes(),
353        );
354
355        let mut count = 0;
356        for actual in actual_iter {
357            assert!(actual.is_err());
358            count += 1;
359        }
360        assert_eq!(count, 3);
361    }
362
363    #[test]
364    fn test_parse_value_escapes() {
365        let actual_iter = Iter::new(
366            r#"
367KEY=my\ cool\ value
368KEY2=\$sweet
369KEY3="awesome stuff \"mang\""
370KEY4='sweet $\fgs'\''fds'
371KEY5="'\"yay\\"\ "stuff"
372KEY6="lol" #well you see when I say lol wh
373KEY7="line 1\nline 2"
374"#
375            .as_bytes(),
376        );
377
378        let expected_iter = vec![
379            ("KEY", r#"my cool value"#),
380            ("KEY2", r#"$sweet"#),
381            ("KEY3", r#"awesome stuff "mang""#),
382            ("KEY4", r#"sweet $\fgs'fds"#),
383            ("KEY5", r#"'"yay\ stuff"#),
384            ("KEY6", "lol"),
385            ("KEY7", "line 1\nline 2"),
386        ]
387        .into_iter()
388        .map(|(key, value)| (key.to_string(), value.to_string()));
389
390        for (expected, actual) in expected_iter.zip(actual_iter) {
391            assert!(actual.is_ok());
392            assert_eq!(expected, actual.unwrap());
393        }
394    }
395
396    #[test]
397    fn test_parse_value_escapes_invalid() {
398        let actual_iter = Iter::new(
399            r#"
400KEY=my uncool value
401KEY2="why
402KEY3='please stop''
403KEY4=h\8u
404"#
405            .as_bytes(),
406        );
407
408        for actual in actual_iter {
409            assert!(actual.is_err());
410        }
411    }
412}
413
414#[cfg(test)]
415mod variable_substitution_tests {
416    use crate::iter::Iter;
417    use std::env;
418
419    fn assert_parsed_string(input_string: &str, expected_parse_result: Vec<(&str, &str)>) {
420        let actual_iter = Iter::new(input_string.as_bytes());
421        let expected_count = &expected_parse_result.len();
422
423        let expected_iter = expected_parse_result
424            .into_iter()
425            .map(|(key, value)| (key.to_string(), value.to_string()));
426
427        let mut count = 0;
428        for (expected, actual) in expected_iter.zip(actual_iter) {
429            assert!(actual.is_ok());
430            assert_eq!(expected, actual.unwrap());
431            count += 1;
432        }
433
434        assert_eq!(count, *expected_count);
435    }
436
437    #[test]
438    fn variable_in_parenthesis_surrounded_by_quotes() {
439        assert_parsed_string(
440            r#"
441            KEY=test
442            KEY1="${KEY}"
443            "#,
444            vec![("KEY", "test"), ("KEY1", "test")],
445        );
446    }
447
448    #[test]
449    fn substitute_undefined_variables_to_empty_string() {
450        assert_parsed_string(r#"KEY=">$KEY1<>${KEY2}<""#, vec![("KEY", "><><")]);
451    }
452
453    #[test]
454    fn do_not_substitute_variables_with_dollar_escaped() {
455        assert_parsed_string(
456            "KEY=>\\$KEY1<>\\${KEY2}<",
457            vec![("KEY", ">$KEY1<>${KEY2}<")],
458        );
459    }
460
461    #[test]
462    fn do_not_substitute_variables_in_weak_quotes_with_dollar_escaped() {
463        assert_parsed_string(
464            r#"KEY=">\$KEY1<>\${KEY2}<""#,
465            vec![("KEY", ">$KEY1<>${KEY2}<")],
466        );
467    }
468
469    #[test]
470    fn do_not_substitute_variables_in_strong_quotes() {
471        assert_parsed_string("KEY='>${KEY1}<>$KEY2<'", vec![("KEY", ">${KEY1}<>$KEY2<")]);
472    }
473
474    #[test]
475    fn same_variable_reused() {
476        assert_parsed_string(
477            r#"
478    KEY=VALUE
479    KEY1=$KEY$KEY
480    "#,
481            vec![("KEY", "VALUE"), ("KEY1", "VALUEVALUE")],
482        );
483    }
484
485    #[test]
486    fn with_dot() {
487        assert_parsed_string(
488            r#"
489    KEY.Value=VALUE
490    "#,
491            vec![("KEY.Value", "VALUE")],
492        );
493    }
494
495    #[test]
496    fn recursive_substitution() {
497        assert_parsed_string(
498            r#"
499            KEY=${KEY1}+KEY_VALUE
500            KEY1=${KEY}+KEY1_VALUE
501            "#,
502            vec![("KEY", "+KEY_VALUE"), ("KEY1", "+KEY_VALUE+KEY1_VALUE")],
503        );
504    }
505
506    #[test]
507    fn variable_without_parenthesis_is_substituted_before_separators() {
508        assert_parsed_string(
509            r#"
510            KEY1=test_user
511            KEY1_1=test_user_with_separator
512            KEY=">$KEY1_1<>$KEY1}<>$KEY1{<"
513            "#,
514            vec![
515                ("KEY1", "test_user"),
516                ("KEY1_1", "test_user_with_separator"),
517                ("KEY", ">test_user_1<>test_user}<>test_user{<"),
518            ],
519        );
520    }
521
522    #[test]
523    fn substitute_variable_from_env_variable() {
524        env::set_var("KEY11", "test_user_env");
525
526        assert_parsed_string(r#"KEY=">${KEY11}<""#, vec![("KEY", ">test_user_env<")]);
527    }
528
529    #[test]
530    fn substitute_variable_env_variable_overrides_dotenv_in_substitution() {
531        env::set_var("KEY11", "test_user_env");
532
533        assert_parsed_string(
534            r#"
535    KEY11=test_user
536    KEY=">${KEY11}<"
537    "#,
538            vec![("KEY11", "test_user"), ("KEY", ">test_user_env<")],
539        );
540    }
541
542    #[test]
543    fn consequent_substitutions() {
544        assert_parsed_string(
545            r#"
546    KEY1=test_user
547    KEY2=$KEY1_2
548    KEY=>${KEY1}<>${KEY2}<
549    "#,
550            vec![
551                ("KEY1", "test_user"),
552                ("KEY2", "test_user_2"),
553                ("KEY", ">test_user<>test_user_2<"),
554            ],
555        );
556    }
557
558    #[test]
559    fn consequent_substitutions_with_one_missing() {
560        assert_parsed_string(
561            r#"
562    KEY2=$KEY1_2
563    KEY=>${KEY1}<>${KEY2}<
564    "#,
565            vec![("KEY2", "_2"), ("KEY", "><>_2<")],
566        );
567    }
568}
569
570#[cfg(test)]
571mod error_tests {
572    use crate::errors::Error::LineParse;
573    use crate::iter::Iter;
574
575    #[test]
576    fn should_not_parse_unfinished_substitutions() {
577        let wrong_value = ">${KEY{<";
578
579        let parsed_values: Vec<_> = Iter::new(
580            format!(
581                r#"
582    KEY=VALUE
583    KEY1={}
584    "#,
585                wrong_value
586            )
587            .as_bytes(),
588        )
589        .collect();
590
591        assert_eq!(parsed_values.len(), 2);
592
593        if let Ok(first_line) = &parsed_values[0] {
594            assert_eq!(first_line, &(String::from("KEY"), String::from("VALUE")))
595        } else {
596            panic!("Expected the first value to be parsed")
597        }
598
599        if let Err(LineParse(second_value, index)) = &parsed_values[1] {
600            assert_eq!(second_value, wrong_value);
601            assert_eq!(*index, wrong_value.len() - 1)
602        } else {
603            panic!("Expected the second value not to be parsed")
604        }
605    }
606
607    #[test]
608    fn should_not_allow_dot_as_first_character_of_key() {
609        let wrong_key_value = ".Key=VALUE";
610
611        let parsed_values: Vec<_> = Iter::new(wrong_key_value.as_bytes()).collect();
612
613        assert_eq!(parsed_values.len(), 1);
614
615        if let Err(LineParse(second_value, index)) = &parsed_values[0] {
616            assert_eq!(second_value, wrong_key_value);
617            assert_eq!(*index, 0)
618        } else {
619            panic!("Expected the second value not to be parsed")
620        }
621    }
622
623    #[test]
624    fn should_not_parse_illegal_format() {
625        let wrong_format = r"<><><>";
626        let parsed_values: Vec<_> = Iter::new(wrong_format.as_bytes()).collect();
627
628        assert_eq!(parsed_values.len(), 1);
629
630        if let Err(LineParse(wrong_value, index)) = &parsed_values[0] {
631            assert_eq!(wrong_value, wrong_format);
632            assert_eq!(*index, 0)
633        } else {
634            panic!("Expected the second value not to be parsed")
635        }
636    }
637
638    #[test]
639    fn should_not_parse_illegal_escape() {
640        let wrong_escape = r">\f<";
641        let parsed_values: Vec<_> =
642            Iter::new(format!("VALUE={}", wrong_escape).as_bytes()).collect();
643
644        assert_eq!(parsed_values.len(), 1);
645
646        if let Err(LineParse(wrong_value, index)) = &parsed_values[0] {
647            assert_eq!(wrong_value, wrong_escape);
648            assert_eq!(*index, wrong_escape.find('\\').unwrap() + 1)
649        } else {
650            panic!("Expected the second value not to be parsed")
651        }
652    }
653}