About Me

A career professional with 19 years of experiences with in application development, solution architecture, management and strategy. Architected, planned, and executed, many programs, projects and solutions delivering valued results to business clients and customers. Able to provide creative solutions to solve problems and articulate the value proposition to upper management and technical design to IT staff. Collaborates across teams and other facet of IT (i.e. operations, infrastructure, security) to validate solutions for completeness. Interface with clients to gather feedback, advise, and solution in the business language.

Monday, November 7, 2016

Azure Function and IBM BlueMix Auto Image Tagger

Lately I have been taking a lot into Azure Functions. I just completed my proof of concept (poc) for an idea that I had. If anyone has ever dealt with trying to get user to tag documents with metadata knows that its a challenge. We have all just through documents to the SharePoint ether with no metadata.

Metadata for documents have huge advantages for starts search works much better and you can implement more advanced functionality.

Disclaimer - The code as is and it is in a POC state .
The man thing that I wanted to achieve is testing out the architectural principles.
Some of the code needs to be refactored to be dynamic to use in a production setting.
I'll do a second cut of this blog with more details about all the steps that I went through to get everything up and running.


Usage: Have a timer/cron process wake up periodically auto tag picture files in a designated picture library. For the timer process Azure Function was used. It was the perfect for this use case. For the auto tagging I used IBM's BlueMix platform visual recognition service. You send the service an image and it uses AI to do image recognition and classify the image. After the results are return the classifications supplied are written back to the SharePoint image library.


IBM BlueMix Resources
  • Visual Recognition
    • http://www.ibm.com/watson/developercloud/visual-recognition.html
  • Watson API Explorer
    • http://www.ibm.com/watson/developercloud/visual-recognition/api/v3/#introduction
Azure Resources
  • Azure Functions Intro Site
    • https://azure.microsoft.com/en-us/services/functions/
  • Azure Functions Site
    • https://functions.azure.com/try?correlationId=a4f806de-be86-4732-88c2-a01525e1cc4e

Logical Architecture


Sequence Diagram

Code from Azure Function

#r "System.Xml.Linq"
#r "Newtonsoft.Json"

using System;
using System.IO;
using System.Net;
using System.Text;
using System.Collections.Specialized;
using System.Collections.Generic;
using System.Xml;
using Newtonsoft.Json;
using SP=Microsoft.SharePoint.Client;


public class ImageClassification
{
    public class Images
    {
        public IList classifiers { get; set; }
        public string image { get; set; }
        public class classifierList
        {
            public class classListItem
            {
                [JsonProperty("class")]
                public string classItem { get; set; }
                public double score { get; set; }
            }
            public string classifier_id { get; set; }
            public string name { get; set; }
            public IList classes { get; set; }
        }

    }
    public int custom_classes { get; set; }
    public IList images { get; set; }
    public int images_processed { get; set; }
}

public class FileItem
{
    public int ID { get; set; }
    public string FileName { get; set; }
    public string FileGuid { get; set; }
    public string RelativeUrl { get; set; }
    public string Keywords { get; set; }
    public FileItem(int Id, string fileName, string fileGuid, string relativeUrl, string keywords)
    {
        ID = Id;
        FileName = fileName;
        FileGuid = fileGuid;
        RelativeUrl = relativeUrl;
        Keywords = keywords;
    }
}

public class SPContext
{
    private static Microsoft.SharePoint.Client.ClientContext clientCxt = null;
    public static string SiteUrl { get; set; }
    public static NetworkCredential SiteCredentials { get; set; }
    public static void SetInstance(string siteUrl,NetworkCredential siteCredentials)
    {
        SiteCredentials = siteCredentials;
        SiteUrl = siteUrl;
        if (clientCxt != null)
        {
            clientCxt.Dispose();
            clientCxt = null;
        }
        
        clientCxt = new Microsoft.SharePoint.Client.ClientContext(SiteUrl);
        clientCxt.Credentials = SiteCredentials;
    }

    private SPContext() { }

    public static Microsoft.SharePoint.Client.ClientContext GetInstance()
    {
        if (clientCxt == null)
        {
            clientCxt = new Microsoft.SharePoint.Client.ClientContext(SiteUrl);
            clientCxt.Credentials = SiteCredentials;
        }
        return clientCxt;
    }
    internal static string GetAbsoluteFileUrl(string spLibraryName, string fileName)
    {
        StringBuilder siteUrl = new StringBuilder( SiteUrl);
        if (!SiteUrl.EndsWith("/"))
        {
            siteUrl.Append("/");
        }
        siteUrl.Append("/" + spLibraryName + "/");
        siteUrl.Append(fileName);
        return siteUrl.ToString();
    }
}

public class ConfigData
{
    public string SPLibrary { get; set; }
    public TraceWriter Logger { get; set; }
    public string ClassificationServiceUrl { get; set; }
    private static ConfigData configData = null;

    private ConfigData() { }
    public static void Load(string library,string classServiceUrl)
    {
        if (configData == null)
        {
            configData = new ConfigData();
        }
        configData.ClassificationServiceUrl = classServiceUrl;
        configData.SPLibrary = library;
    }
    public static ConfigData GetInstance()
    {
        if (configData == null)
        {
            configData = new ConfigData();
        }
        return configData;
    }
}


private static void TagFile(FileItem fileItem,ImageClassification imgClassification)
{
    ConfigData configData = ConfigData.GetInstance();
    TraceWriter log = configData.Logger;

    log.Info("TagFile-Start:");
    log.Info("TagFile-File:"+fileItem.ID + " / "+ fileItem.FileName);
    log.Info("TagFile-Classification:"+imgClassification.images[0].classifiers[0].classes[0].classItem);
    SP.ClientContext clientContext = SPContext.GetInstance();
    SP.List oList = clientContext.Web.Lists.GetByTitle(configData.SPLibrary);
    SP.ListItem oListItem = oList.GetItemById(fileItem.ID);

//should add logic to check all of the classifications and scores
//if the score is below a certain threshold then we should through out the classification 
    oListItem["Keywords"] = imgClassification.images[0].classifiers[0].classes[0].classItem;

    oListItem.Update();
    clientContext.ExecuteQuery();
    log.Info("TagFile-End: ");
}

private static ImageClassification PostDataReturnClassifier(string webUrl, MemoryStream memBuffer, FileItem fileItem, NameValueCollection formFields = null)
{
    ConfigData config = ConfigData.GetInstance();
    TraceWriter log = config.Logger;
    log.Info("PostDataReturnClassifier-Start: ");
    /* Thanks for all the coders from Stack Overflow */
/* http://stackoverflow.com/questions/566462/upload-files-with-httpwebrequest-multipart-form-data */
/* http://stackoverflow.com/questions/1688855/httpwebrequest-c-sharp-uploading-a-file */
    string boundary = "----------------------------" + DateTime.Now.Ticks.ToString("x");
    
    HttpWebRequest request = (HttpWebRequest)WebRequest.Create(webUrl);
    request.ContentType = "multipart/form-data; boundary=" + boundary;
    request.Method = "POST";
    //request.KeepAlive = true;
    request.ServicePoint.Expect100Continue = false;
    Stream memStream = new MemoryStream();

    var boundarybytes = System.Text.Encoding.ASCII.GetBytes("\r\n--" + boundary + "\r\n");
    var endBoundaryBytes = System.Text.Encoding.ASCII.GetBytes("\r\n--" + boundary + "--");

/*
    #region Not in use right now
    string formdataTemplate = "\r\n--" + boundary + "\r\nContent-Disposition: form-data; name=\"{0}\";\r\n\r\n{1}";
    if (formFields != null)
    {
        foreach (string key in formFields.Keys)
        {
            string formitem = string.Format(formdataTemplate, key, formFields[key]);
            byte[] formitembytes = System.Text.Encoding.UTF8.GetBytes(formitem);
            memStream.Write(formitembytes, 0, formitembytes.Length);
        }
    }
    #endregion
*/
    string headerTemplate = "Content-Disposition: form-data; name=\"{0}\"; filename=\"{1}\"\r\n" + "Content-Type: image/jpeg\r\n\r\n";

    memStream.Write(boundarybytes, 2, boundarybytes.Length-2); //starting at two skip first two bytes, two many bytes! 
    var header = string.Format(headerTemplate, "uplTheFile", fileItem.FileName);
    var headerbytes = System.Text.Encoding.UTF8.GetBytes(header);

    memStream.Write(headerbytes, 0, headerbytes.Length);

    Byte[] aryBytes = memBuffer.ToArray();
    memStream.Write(aryBytes, 0, aryBytes.Length);
    memStream.Write(endBoundaryBytes, 0, endBoundaryBytes.Length);
    request.ContentLength = memStream.Length;

    using (Stream requestStream = request.GetRequestStream())
    {
        memStream.Flush();
        memStream.Position = 0;
        byte[] tempBuffer = new byte[memStream.Length];
        memStream.Read(tempBuffer, 0, tempBuffer.Length);
        memStream.Close();
        requestStream.Write(tempBuffer, 0, tempBuffer.Length);
    }

    try
    {
  log.Info("PostDataReturnClassifier-Posting to Service: ");
        using (var response = request.GetResponse())
        {
            using (Stream streamRes = response.GetResponseStream())
            {
                using (StreamReader readResult = new StreamReader(streamRes))
                {
                    //string jsonResult = readResult.ReadToEnd();
                    JsonSerializer serializer = new JsonSerializer();
                    ImageClassification imgClass = (ImageClassification)serializer.Deserialize(readResult, typeof(ImageClassification));
                    log.Info("PostDataReturnClassifier-Result: "+imgClass.images[0].classifiers[0].classes[0].classItem);
                    return imgClass;
                }
            }
        }
    }
    catch(Exception e)
    {
        log.Info("PostDataReturnClassifier-Error: "+e.Message);
        return null;
    }
}
        private static MemoryStream DownloadItem(FileItem workItem)
        {
            ConfigData config = ConfigData.GetInstance();
            TraceWriter log = config.Logger;
            log.Info("DownloadItem-Start: ");

            string webFileUrl = SPContext.GetAbsoluteFileUrl(config.SPLibrary, workItem.FileName);
            log.Info("DownloadItem-File: "+webFileUrl);
            WebRequest request = WebRequest.Create(webFileUrl);
            request.Credentials = SPContext.SiteCredentials;
            //request.AllowWriteStreamBuffering = true;
            request.Timeout = 30000; //this should come from a config settings
            MemoryStream memStream = new MemoryStream();
            log.Info("DownloadItem-Starting Downloaded ");
            using (WebResponse response = request.GetResponse())
            {
                // Display the status.
                //Console.WriteLine(((HttpWebResponse)response).StatusDescription);
                // Get the stream containing content returned by the server.
                log.Info("DownloadItem-Getting Downloaded ");
                using (Stream dataStream = response.GetResponseStream())
                {
                    byte[] buffer = new byte[1024];
                    int received = 0;

                    int size = dataStream.Read(buffer, 0, buffer.Length);
                    log.Info($"Got data: {DateTime.Now} bytes in buffer:" + size);

                    while (size > 0)
                    {
                        memStream.Write(buffer, 0, size);
                        received += size;
                        size = dataStream.Read(buffer, 0, buffer.Length);
                    }
                    log.Info("DownloadItem-Downloaded bytes:"+received);
                }
            }
            
            memStream.Flush();
            memStream.Position = 0; //reposition the memory pointer
            return memStream;
        }

private static List GetWorkItems()
{
    ConfigData config = ConfigData.GetInstance();
    TraceWriter log = config.Logger;
    SP.ClientContext clientContext = SPContext.GetInstance();

    SP.List oList = clientContext.Web.Lists.GetByTitle(config.SPLibrary);
    SP.CamlQuery camlQuery = new SP.CamlQuery();
    camlQuery.ViewXml = "100";
    SP.ListItemCollection collListItem = oList.GetItems(camlQuery);
    clientContext.Load(collListItem);
    clientContext.ExecuteQuery();
    List workList = new List();
    string keyWords;
    foreach (SP.ListItem oListItem in collListItem)
    {
        log.Info("ID: "+ oListItem.Id +" \nFile Name: "+oListItem.FieldValues["FileLeafRef"]+" \nGUID: "+ oListItem.FieldValues["GUID"]);
        keyWords = string.Empty;

        //Process items that don't have the keywords set
        if (oListItem.FieldValues["Keywords"] == null)
        {
            workList.Add(new FileItem(oListItem.Id, oListItem.FieldValues["FileLeafRef"].ToString(), oListItem.FieldValues["GUID"].ToString(), oListItem.FieldValues["FileDirRef"].ToString(), keyWords));
        }
    }
    return workList;
}

public static void ProcessEngine()
{
    TraceWriter log = ConfigData.GetInstance().Logger;
    log.Info($"ProcessEngine-Enter: {DateTime.Now} ");
//Need to register with IBM BlueMix to get URL and apikey for visual-recognition service
    string watsonImgRecUrl = "https://gateway-EX.watsonplatform.net/visual-recognition/api/v3/classify?api_key=123456789&version=2016-05-20";
    log.Info($"ProcessEngine-: Loading Config data ");

    ConfigData.Load("pics", watsonImgRecUrl);
    ConfigData configData = ConfigData.GetInstance();

    log.Info($"ProcessEngine-: Setting SPContext Instance ");
 //this for SharePoint on-premise that uses IWA for authentication (will add other authentication schemes later)
    SPContext.SetInstance("http://SomeSharePointOnPremise.sample.com/site/SiteEx", new NetworkCredential("MyUserName", "MyPassword", "MyDomain"));
    
   log.Info($"ProcessEngine-: Getting Work Items "); 
    foreach(FileItem fileItem in GetWorkItems())
    {
        log.Info($"ProcessEngine-: Processing Work Item ... " + fileItem.FileName + " / "+ fileItem.FileGuid);

        //log what your processing
        log.Info($"ProcessEngine-: Downloading... ");
        //Download
        MemoryStream memStream = DownloadItem(fileItem);
        //Upload & classify 
        log.Info($"ProcessEngine-: Classifying... ");

        ImageClassification imageClassification = PostDataReturnClassifier(configData.ClassificationServiceUrl, memStream, fileItem, null);
        //Update list entry
        log.Info($"ProcessEngine-: Updating Metadata... ");
        TagFile(fileItem,imageClassification);
    }
    log.Info($"ProcessEngine-End: {DateTime.Now} ");
}


public static void Run(TimerInfo myTimer, TraceWriter log)
{
    log.Info($"Run-Start: {DateTime.Now} ");
    ConfigData configData = ConfigData.GetInstance();
    configData.Logger = log;
    ProcessEngine();   
    log.Info($"Run-End: {DateTime.Now} ");
}


No comments:

Post a Comment