Syntax highlighting C# with newer language version

Is there an updatet version for CodeMirror 5 (yes I know that CodeMirror 6 is the latest version) that support newer languages functions in C# (for example 10).

For example something like string interpolation is currently not correctly syntax highlighted.

var test = $“{somvariable} some text”;

Also some new keywords are not syntax highlighed.

Examples for string interpolation can be found here

It doesn’t look like that’s been implemented, no.

Is this difficulent to implement? I’m not a javascript expert but made tokenizers before in C#.

Is this the place to be? Any guidance would be helpfull.

And is there another syntax highlighter in code mirror 5 that has this implemented? … so that I can do some copy paste things.

I found this but have no idea where to find this in the source code → [JSX] JavaScript string interpolation breaks on HTML string · Issue #4908 · codemirror/codemirror5 · GitHub

It looks like the Kotlin mode (also implemented in clike.js) has support for some kind of string interpolation, but that doesn’t appear to go back to the base tokenizer inside ${}. I’m not sure the clike framework, as it is, allows for that kind of nesting magic.

If I modify the clike.js to this

hooks: {
  "@": function(stream, state) {
    if (stream.eat('"')) {
      state.tokenize = tokenAtString;
      return tokenAtString(stream, state);
    }
    stream.eatWhile(/[\w\$_]/);
    return "meta";
  },
  '$': function(stream, state) {
    state.tokenize = tokenCSharpInterpolationString(stream.match('$'));
    return state.tokenize(stream, state);
  }
}

});

function tokenCSharpInterpolationString(tripleString){
return function (stream, state) {
var escaped = false, next, end = false;
while (!stream.eol()) {
if (!tripleString && !escaped && stream.match(‘"’) ) {end = true; break;}
if (tripleString && stream.match(‘“”"’)) {end = true; break;}
next = stream.next();
if(!escaped && next == “$” && stream.match(‘{’))
stream.skipTo(“}”);
escaped = !escaped && next == “\” && !tripleString;
}
if (end || !tripleString)
state.tokenize = null;
return “string”;
}
}

It turns into this (which looks not bad at all for just copying it):

But I expected it to do this

(Screenshot from Visual Studio 2022)

The “test” word is incorrect (that is just a string) and the : string.Empty) is incorrect and the last ) … for the rest it works.

I have been playing around with the string interpolation and this seems to work as I wanted (and almost the same as what Visual Studio from Microsoft does)

def("text/x-csharp", {
    name: "clike",
    keywords: words("abstract as async await base break case catch checked class const continue" +
                    " default delegate do else enum event explicit extern finally fixed for" +
                    " foreach goto if implicit in init interface internal is lock namespace new" +
                    " operator out override params private protected public readonly record ref required return sealed" +
                    " sizeof stackalloc static struct switch this throw try typeof unchecked" +
                    " unsafe using virtual void volatile while add alias ascending descending dynamic from get" +
                    " global group into join let orderby partial remove select set value var yield"),
    types: words("Action Boolean Byte Char DateTime DateTimeOffset Decimal Double Func" +
                 " Guid Int16 Int32 Int64 Object SByte Single String Task TimeSpan UInt16 UInt32" +
                 " UInt64 bool byte char decimal double short int long object"  +
                 " sbyte float string ushort uint ulong"),
    blockKeywords: words("catch class do else finally for foreach if struct switch try while"),
    defKeywords: words("class interface namespace record struct var"),
    typeFirstDefinitions: true,
    atoms: words("true false null"),
    hooks: {
      "@": function(stream, state) {
        if (stream.eat('"')) {
          state.tokenize = tokenAtString;
          return tokenAtString(stream, state);
        }
        stream.eatWhile(/[\w\$_]/);
        return "meta";
      },
      '$': function(stream, state) {
          if (stream.peek() !== '"') return false;
        state.tokenize = tokenStringInterpolation(false, false);
        return state.tokenize(stream, state);
      }
    }
  });

    function tokenStringInterpolation(interpolating, insideString) {
        return function (stream, state) {
            var ch;
            while (!stream.eol()) {

                ch = stream.next();

                if (interpolating) {

                    if (ch != "(") {
                        stream.skipTo("}");
                        state.tokenize = tokenStringInterpolation(false, insideString)
                        return "variable";
                    }
                }

                if (ch == '"') {
                    if (!insideString) {
                        state.tokenize = tokenStringInterpolation(interpolating, true)
                        return "string";
                    }
                    else {
                        state.tokenize = null;
                        return "";
                    }
                }

                if (ch == "{") {
                    if (stream.peek() === '{') // This '{{' escapes string interpolation
                        stream.next();
                    else {
                        state.tokenize = tokenStringInterpolation(true, insideString);
                        return "";
                    }
                }

                if (ch == "}") {
                    if (stream.peek() === '}') // This '}}' escapes string interpolation
                        stream.next();
                    else {
                        state.tokenize = tokenStringInterpolation(false, insideString);
                        return "";
                    }
                }

                if (ch == "(" && interpolating) {
                    state.tokenize = null;
                    return "";
                }

                state.tokenize = tokenStringInterpolation(false, insideString)
                return "string";
            }
            return "string"
        }
    }

The only thing what I can’t get right is this char … meaby you have an idea and if you have some tips to improve the code then I’m happy to hear them because this is the first time I made something like this for codemirror.

The " should also be colored as a string

I would think the return null at the end of the if (ch == '"') clause is what determines the lack of highlighting for that quote.

I now hook the $ char to do string interpolation. Is there an option to hook any char and use a condition to tell if I want to use that hook.

blockKeywords: words("catch class do else finally for foreach if struct switch try while"),
defKeywords: words("class interface namespace record struct var"),
typeFirstDefinitions: true,
atoms: words("true false null"),
hooks: {
  "@": function(stream, state) {
    if (stream.eat('"')) {
      state.tokenize = tokenAtString;
      return tokenAtString(stream, state);
    }
    stream.eatWhile(/[\w\$_]/);
    return "meta";
  },
  '$': function(stream, state) {
      if (stream.peek() !== '"') return false;
    state.tokenize = tokenStringInterpolation(false, false);
    return state.tokenize(stream, state);
  }
}

});

I got string interpolation working perfectly in codemirror 5 but I had to use an external variable to keep track of the nesting level inside an interpolation string? It looks kind of ugly this way. Is there a better way to do this? … for the rest it works perfectly even with nesting inside nesting inside nesting.

If you want to put this inside codemirror 5 then feel free to use it :slight_smile:

// CodeMirror, copyright (c) by Marijn Haverbeke and others
// Distributed under an MIT license: https://codemirror.net/5/LICENSE
var interpolatingLevel = 0;


def("text/x-csharp", {
    name: "clike",
    keywords: words("abstract as async await base break case catch checked class const continue" +
                    " default delegate do else enum event explicit extern finally fixed for" +
                    " foreach goto if implicit in init interface internal is lock namespace new" +
                    " operator out override params private protected public readonly record ref required return sealed" +
                    " sizeof stackalloc static struct switch this throw try typeof unchecked" +
                    " unsafe using virtual void volatile while add alias ascending descending dynamic from get" +
                    " global group into join let orderby partial remove select set value var yield"),
    types: words("Action Boolean Byte Char DateTime DateTimeOffset Decimal Double Func" +
                 " Guid Int16 Int32 Int64 Object SByte Single String Task TimeSpan UInt16 UInt32" +
                 " UInt64 bool byte char decimal double short int long object"  +
                 " sbyte float string ushort uint ulong"),
    blockKeywords: words("catch class do else finally for foreach if struct switch try while"),
    defKeywords: words("class interface namespace record struct var"),
    typeFirstDefinitions: true,
    atoms: words("true false null"),
      hooks: {
          "@": function (stream, state) {
              if (stream.eat('"')) {
                  state.tokenize = tokenAtString;
                  return tokenAtString(stream, state);
              }
              stream.eatWhile(/[\w\$_]/);
              return "meta";
          },
          "$": function (stream, state) {
              if (stream.peek() !== '"') return false;
              interpolatingLevel = 0;
              state.tokenize = tokenStringInterpolation(false);
              return state.tokenize(stream, state);
          },
          "}": function (stream, state) {
              if (interpolatingLevel == 0) return false;
              state.tokenize = tokenStringInterpolation(true);
              return null;
          },
          "{": function (stream, state) {
              if (interpolatingLevel == 0) return false;
              state.tokenize = tokenStringInterpolation(false);
              return state.tokenize(stream, state);
          }
      }
  });

    function tokenStringInterpolation(insideString) {
        const interpolationChars = ['(', '"', '{', '}'];
        return function (stream, state) {
            var ch;

            while (!stream.eol()) {

                // Just keep reading as long we don't have any interpolation chars
                if (!interpolationChars.indexOf(stream.peek()) == -1) {
                    ch = stream.next();
                    continue;
                }

                ch = stream.next();

                if (ch == '"') {
                    if (!insideString)
                        state.tokenize = tokenStringInterpolation(true)
                    else
                        state.tokenize = null;

                    return "string";
                }

                if (ch == "{") {
                    if (stream.peek() === '{') { // This '{{' escapes string interpolation
                        stream.next();
                        state.tokenize = tokenStringInterpolation(insideString)
                        return "string-2";
                    }
                    else {
                        interpolatingLevel += 1;
                        state.tokenize = null;
                        return null;
                    }
                }

                if (ch == "}") {
                    if (stream.peek() === '}') {// This '}}' escapes string interpolation
                        stream.next();
                        state.tokenize = tokenStringInterpolation(insideString)
                        return "string-2";
                    }
                    else {
                        interpolatingLevel -= 1;
                        state.tokenize = null;
                        return null;
                    }
                }

                state.tokenize = tokenStringInterpolation(insideString)
                return "string";
            }

            return "string"
        }
    }

  function tokenTripleString(stream, state) {
    var escaped = false;
    while (!stream.eol()) {
      if (!escaped && stream.match('"""')) {
        state.tokenize = null;
        break;
      }
      escaped = stream.next() == "\\" && !escaped;
    }
    return "string";
  }

  function tokenNestedComment(depth) {
    return function (stream, state) {
      var ch
      while (ch = stream.next()) {
        if (ch == "*" && stream.eat("/")) {
          if (depth == 1) {
            state.tokenize = null
            break
          } else {
            state.tokenize = tokenNestedComment(depth - 1)
            return state.tokenize(stream, state)
          }
        } else if (ch == "/" && stream.eat("*")) {
          state.tokenize = tokenNestedComment(depth + 1)
          return state.tokenize(stream, state)
        }
      }
      return "comment"
    }
  }

This is how it is working now in the latest code mirror 5 version

And this is what Visual Studio 2022 is making of it

There are some color differences but that is just because of the used color schemas