1

In my Web Api project, I have a [HttpPost] method - public HttpResponseMessage saveFiles() {} which saves some audio files to the server. after I save the files, I need to call a method in the Microsoft.Speech server api, this method is async but it returns void:

public void RecognizeAsync(RecognizeMode mode);

I would want to wait until this method is finished and just then return an answer to the client with all the information I gathered. I can not use await here because this function returns void. I implemented an event: public event RecognitionFinishedHandler RecognitionFinished;

This event is called when this function is finished.

-- EDIT I am wrapping this event with a Task, but I guess I am doing something wrong because I can not get the RecognizeAsync function to actually do its job. it seems that the function is not working now, here is my code:

the functions containing the Speech recognition:

public delegate void RecognitionFinishedHandler(object sender);
public class SpeechActions
{
    public event RecognitionFinishedHandler RecognitionFinished;
    private SpeechRecognitionEngine sre;
    public Dictionary<string, List<TimeSpan>> timeTags; // contains the times of each tag: "tag": [00:00, 00:23 .. ]

    public SpeechActions()
    {
        sre = new SpeechRecognitionEngine(new System.Globalization.CultureInfo("en-US"));
        sre.SpeechRecognized += new EventHandler<SpeechRecognizedEventArgs>(sre_SpeechRecognized);
        sre.AudioStateChanged += new EventHandler<AudioStateChangedEventArgs>(sre_AudioStateChanged);
    }

    /// <summary>
    /// Calculates the tags appearances in a voice over wav file.
    /// </summary>
    /// <param name="path">The path to the voice over wav file.</param>
    public void CalcTagsAppearancesInVO(string path, string[] tags, TimeSpan voLength)
    {
        timeTags = new Dictionary<string, List<TimeSpan>>();
        sre.SetInputToWaveFile(path);

        foreach (string tag in tags)
        {
            GrammarBuilder gb = new GrammarBuilder(tag);
            gb.Culture = new System.Globalization.CultureInfo("en-US");
            Grammar g = new Grammar(gb);
            sre.LoadGrammar(g);
        }

        sre.RecognizeAsync(RecognizeMode.Multiple);
    }

    void sre_AudioStateChanged(object sender, AudioStateChangedEventArgs e)
    {
        if (e.AudioState == AudioState.Stopped)
        {
            sre.RecognizeAsyncStop();
            if (RecognitionFinished != null)
            {
                RecognitionFinished(this);
            }
        }
    }

    void sre_SpeechRecognized(object sender, SpeechRecognizedEventArgs e)
    {
        string word = e.Result.Text;
        TimeSpan time = e.Result.Audio.AudioPosition;
        if(!timeTags.ContainsKey(word))
        {
            timeTags.Add(word, new List<TimeSpan>());
        } 

        // add the found time
        timeTags[word].Add(time);
    }
}

and my function that calls it + the event hander:

[HttpPost]
    public HttpResponseMessage saveFiles()
    {
        if (HttpContext.Current.Request.Files.AllKeys.Any())
        {
            string originalFolder = HttpContext.Current.Server.MapPath("~/files/original/");
            string lowFolder = HttpContext.Current.Server.MapPath("~/files/low/");
            string audioFolder = HttpContext.Current.Server.MapPath("~/files/audio/");
            string voiceoverPath = Path.Combine(originalFolder, Path.GetFileName(HttpContext.Current.Request.Files["voiceover"].FileName));
            string outputFile = HttpContext.Current.Server.MapPath("~/files/output/") + "result.mp4";
            string voiceoverWavPath = Path.Combine(audioFolder, "voiceover.wav");
            var voiceoverInfo = Resource.From(voiceoverWavPath).LoadMetadata().Streams.OfType<AudioStream>().ElementAt(0).Info;
            DirectoryInfo di = new DirectoryInfo(originalFolder);
            // speech recognition
            // get tags from video filenames
            string sTags = "";
            di = new DirectoryInfo(HttpContext.Current.Server.MapPath("~/files/low/"));

            foreach (var item in di.EnumerateFiles())
            {
                string filename = item.Name.Substring(0, item.Name.LastIndexOf("."));
                if (item.Name.ToLower().Contains("thumbs") || filename == "voiceover")
                {
                    continue;
                }
                sTags += filename + ",";
            }
            if (sTags.Length > 0) // remove last ','
            {
                sTags = sTags.Substring(0, sTags.Length - 1);
            }
            string[] tags = sTags.Split(new char[] { ',' });

            // HERE STARTS THE PROBLEMATIC PART! ----------------------------------------------------
            var task = GetSpeechActionsCalculated(voiceoverWavPath, tags, voiceoverInfo.Duration);

            // now return the times to the client
            var finalTimes = GetFinalTimes(HttpContext.Current.Server.MapPath("~/files/low/"), task.Result.timeTags);
            var goodResponse = Request.CreateResponse(HttpStatusCode.OK, finalTimes);
            return goodResponse;
        }
        return Request.CreateResponse(HttpStatusCode.OK, "no files");
    }
    private Task<SpeechActions> GetSpeechActionsCalculated(string voPath, string[] tags, TimeSpan voLength)
    {
        var tcs = new TaskCompletionSource<SpeechActions>();
        SpeechActions sa = new SpeechActions();
        sa.RecognitionFinished += (s) =>
        {
            tcs.TrySetResult((SpeechActions)s);
        };
        sa.CalcTagsAppearancesInVO(voPath, tags, voLength);

        return tcs.Task;
    }
4

1 Answer 1

2

You're almost there with your edit, you just have to await the task:

[HttpPost]
public async Task<HttpResponseMessage> saveFiles()
{
    if (HttpContext.Current.Request.Files.AllKeys.Any())
    {
        ...

        string[] tags = sTags.Split(new char[] { ',' });

        await GetSpeechActionsCalculated(voiceoverWavPath, tags, voiceoverInfo.Duration);

        // now return the times to the client
        var finalTimes = GetFinalTimes(HttpContext.Current.Server.MapPath("~/files/low/"), task.Result.timeTags);
        var goodResponse = Request.CreateResponse(HttpStatusCode.OK, finalTimes);
        return goodResponse;
    }
    return Request.CreateResponse(HttpStatusCode.OK, "no files");
}
Sign up to request clarification or add additional context in comments.

1 Comment

thanks a lot @stephencleary, it works now. I am facing some issues with the Speech API, but at least I can wait for the speech recognition to end.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.