Metadata for documents have huge advantages for starts search works much better and you can implement more advanced functionality.
Disclaimer - The code as is and it is in a POC state .
The man thing that I wanted to achieve is testing out the architectural principles.
Some of the code needs to be refactored to be dynamic to use in a production setting.
I'll do a second cut of this blog with more details about all the steps that I went through to get everything up and running.
Usage: Have a timer/cron process wake up periodically auto tag picture files in a designated picture library. For the timer process Azure Function was used. It was the perfect for this use case. For the auto tagging I used IBM's BlueMix platform visual recognition service. You send the service an image and it uses AI to do image recognition and classify the image. After the results are return the classifications supplied are written back to the SharePoint image library.
IBM BlueMix Resources
- Visual Recognition
- http://www.ibm.com/watson/developercloud/visual-recognition.html
- Watson API Explorer
- http://www.ibm.com/watson/developercloud/visual-recognition/api/v3/#introduction
- Azure Functions Intro Site
- https://azure.microsoft.com/en-us/services/functions/
- Azure Functions Site
- https://functions.azure.com/try?correlationId=a4f806de-be86-4732-88c2-a01525e1cc4e
Logical Architecture
Sequence Diagram
Code from Azure Function
#r "System.Xml.Linq" #r "Newtonsoft.Json" using System; using System.IO; using System.Net; using System.Text; using System.Collections.Specialized; using System.Collections.Generic; using System.Xml; using Newtonsoft.Json; using SP=Microsoft.SharePoint.Client; public class ImageClassification { public class Images { public IListclassifiers { get; set; } public string image { get; set; } public class classifierList { public class classListItem { [JsonProperty("class")] public string classItem { get; set; } public double score { get; set; } } public string classifier_id { get; set; } public string name { get; set; } public IList } public int custom_classes { get; set; } public IListclasses { get; set; } } images { get; set; } public int images_processed { get; set; } } public class FileItem { public int ID { get; set; } public string FileName { get; set; } public string FileGuid { get; set; } public string RelativeUrl { get; set; } public string Keywords { get; set; } public FileItem(int Id, string fileName, string fileGuid, string relativeUrl, string keywords) { ID = Id; FileName = fileName; FileGuid = fileGuid; RelativeUrl = relativeUrl; Keywords = keywords; } } public class SPContext { private static Microsoft.SharePoint.Client.ClientContext clientCxt = null; public static string SiteUrl { get; set; } public static NetworkCredential SiteCredentials { get; set; } public static void SetInstance(string siteUrl,NetworkCredential siteCredentials) { SiteCredentials = siteCredentials; SiteUrl = siteUrl; if (clientCxt != null) { clientCxt.Dispose(); clientCxt = null; } clientCxt = new Microsoft.SharePoint.Client.ClientContext(SiteUrl); clientCxt.Credentials = SiteCredentials; } private SPContext() { } public static Microsoft.SharePoint.Client.ClientContext GetInstance() { if (clientCxt == null) { clientCxt = new Microsoft.SharePoint.Client.ClientContext(SiteUrl); clientCxt.Credentials = SiteCredentials; } return clientCxt; } internal static string GetAbsoluteFileUrl(string spLibraryName, string fileName) { StringBuilder siteUrl = new StringBuilder( SiteUrl); if (!SiteUrl.EndsWith("/")) { siteUrl.Append("/"); } siteUrl.Append("/" + spLibraryName + "/"); siteUrl.Append(fileName); return siteUrl.ToString(); } } public class ConfigData { public string SPLibrary { get; set; } public TraceWriter Logger { get; set; } public string ClassificationServiceUrl { get; set; } private static ConfigData configData = null; private ConfigData() { } public static void Load(string library,string classServiceUrl) { if (configData == null) { configData = new ConfigData(); } configData.ClassificationServiceUrl = classServiceUrl; configData.SPLibrary = library; } public static ConfigData GetInstance() { if (configData == null) { configData = new ConfigData(); } return configData; } } private static void TagFile(FileItem fileItem,ImageClassification imgClassification) { ConfigData configData = ConfigData.GetInstance(); TraceWriter log = configData.Logger; log.Info("TagFile-Start:"); log.Info("TagFile-File:"+fileItem.ID + " / "+ fileItem.FileName); log.Info("TagFile-Classification:"+imgClassification.images[0].classifiers[0].classes[0].classItem); SP.ClientContext clientContext = SPContext.GetInstance(); SP.List oList = clientContext.Web.Lists.GetByTitle(configData.SPLibrary); SP.ListItem oListItem = oList.GetItemById(fileItem.ID); //should add logic to check all of the classifications and scores //if the score is below a certain threshold then we should through out the classification oListItem["Keywords"] = imgClassification.images[0].classifiers[0].classes[0].classItem; oListItem.Update(); clientContext.ExecuteQuery(); log.Info("TagFile-End: "); } private static ImageClassification PostDataReturnClassifier(string webUrl, MemoryStream memBuffer, FileItem fileItem, NameValueCollection formFields = null) { ConfigData config = ConfigData.GetInstance(); TraceWriter log = config.Logger; log.Info("PostDataReturnClassifier-Start: "); /* Thanks for all the coders from Stack Overflow */ /* http://stackoverflow.com/questions/566462/upload-files-with-httpwebrequest-multipart-form-data */ /* http://stackoverflow.com/questions/1688855/httpwebrequest-c-sharp-uploading-a-file */ string boundary = "----------------------------" + DateTime.Now.Ticks.ToString("x"); HttpWebRequest request = (HttpWebRequest)WebRequest.Create(webUrl); request.ContentType = "multipart/form-data; boundary=" + boundary; request.Method = "POST"; //request.KeepAlive = true; request.ServicePoint.Expect100Continue = false; Stream memStream = new MemoryStream(); var boundarybytes = System.Text.Encoding.ASCII.GetBytes("\r\n--" + boundary + "\r\n"); var endBoundaryBytes = System.Text.Encoding.ASCII.GetBytes("\r\n--" + boundary + "--"); /* #region Not in use right now string formdataTemplate = "\r\n--" + boundary + "\r\nContent-Disposition: form-data; name=\"{0}\";\r\n\r\n{1}"; if (formFields != null) { foreach (string key in formFields.Keys) { string formitem = string.Format(formdataTemplate, key, formFields[key]); byte[] formitembytes = System.Text.Encoding.UTF8.GetBytes(formitem); memStream.Write(formitembytes, 0, formitembytes.Length); } } #endregion */ string headerTemplate = "Content-Disposition: form-data; name=\"{0}\"; filename=\"{1}\"\r\n" + "Content-Type: image/jpeg\r\n\r\n"; memStream.Write(boundarybytes, 2, boundarybytes.Length-2); //starting at two skip first two bytes, two many bytes! var header = string.Format(headerTemplate, "uplTheFile", fileItem.FileName); var headerbytes = System.Text.Encoding.UTF8.GetBytes(header); memStream.Write(headerbytes, 0, headerbytes.Length); Byte[] aryBytes = memBuffer.ToArray(); memStream.Write(aryBytes, 0, aryBytes.Length); memStream.Write(endBoundaryBytes, 0, endBoundaryBytes.Length); request.ContentLength = memStream.Length; using (Stream requestStream = request.GetRequestStream()) { memStream.Flush(); memStream.Position = 0; byte[] tempBuffer = new byte[memStream.Length]; memStream.Read(tempBuffer, 0, tempBuffer.Length); memStream.Close(); requestStream.Write(tempBuffer, 0, tempBuffer.Length); } try { log.Info("PostDataReturnClassifier-Posting to Service: "); using (var response = request.GetResponse()) { using (Stream streamRes = response.GetResponseStream()) { using (StreamReader readResult = new StreamReader(streamRes)) { //string jsonResult = readResult.ReadToEnd(); JsonSerializer serializer = new JsonSerializer(); ImageClassification imgClass = (ImageClassification)serializer.Deserialize(readResult, typeof(ImageClassification)); log.Info("PostDataReturnClassifier-Result: "+imgClass.images[0].classifiers[0].classes[0].classItem); return imgClass; } } } } catch(Exception e) { log.Info("PostDataReturnClassifier-Error: "+e.Message); return null; } } private static MemoryStream DownloadItem(FileItem workItem) { ConfigData config = ConfigData.GetInstance(); TraceWriter log = config.Logger; log.Info("DownloadItem-Start: "); string webFileUrl = SPContext.GetAbsoluteFileUrl(config.SPLibrary, workItem.FileName); log.Info("DownloadItem-File: "+webFileUrl); WebRequest request = WebRequest.Create(webFileUrl); request.Credentials = SPContext.SiteCredentials; //request.AllowWriteStreamBuffering = true; request.Timeout = 30000; //this should come from a config settings MemoryStream memStream = new MemoryStream(); log.Info("DownloadItem-Starting Downloaded "); using (WebResponse response = request.GetResponse()) { // Display the status. //Console.WriteLine(((HttpWebResponse)response).StatusDescription); // Get the stream containing content returned by the server. log.Info("DownloadItem-Getting Downloaded "); using (Stream dataStream = response.GetResponseStream()) { byte[] buffer = new byte[1024]; int received = 0; int size = dataStream.Read(buffer, 0, buffer.Length); log.Info($"Got data: {DateTime.Now} bytes in buffer:" + size); while (size > 0) { memStream.Write(buffer, 0, size); received += size; size = dataStream.Read(buffer, 0, buffer.Length); } log.Info("DownloadItem-Downloaded bytes:"+received); } } memStream.Flush(); memStream.Position = 0; //reposition the memory pointer return memStream; } private static ListGetWorkItems() { ConfigData config = ConfigData.GetInstance(); TraceWriter log = config.Logger; SP.ClientContext clientContext = SPContext.GetInstance(); SP.List oList = clientContext.Web.Lists.GetByTitle(config.SPLibrary); SP.CamlQuery camlQuery = new SP.CamlQuery(); camlQuery.ViewXml = ""; SP.ListItemCollection collListItem = oList.GetItems(camlQuery); clientContext.Load(collListItem); clientContext.ExecuteQuery(); List 100 workList = new List //Process items that don't have the keywords set if (oListItem.FieldValues["Keywords"] == null) { workList.Add(new FileItem(oListItem.Id, oListItem.FieldValues["FileLeafRef"].ToString(), oListItem.FieldValues["GUID"].ToString(), oListItem.FieldValues["FileDirRef"].ToString(), keyWords)); } } return workList; } public static void ProcessEngine() { TraceWriter log = ConfigData.GetInstance().Logger; log.Info($"ProcessEngine-Enter: {DateTime.Now} "); //Need to register with IBM BlueMix to get URL and apikey for visual-recognition service string watsonImgRecUrl = "https://gateway-EX.watsonplatform.net/visual-recognition/api/v3/classify?api_key=123456789&version=2016-05-20"; log.Info($"ProcessEngine-: Loading Config data "); ConfigData.Load("pics", watsonImgRecUrl); ConfigData configData = ConfigData.GetInstance(); log.Info($"ProcessEngine-: Setting SPContext Instance "); //this for SharePoint on-premise that uses IWA for authentication (will add other authentication schemes later) SPContext.SetInstance("http://SomeSharePointOnPremise.sample.com/site/SiteEx", new NetworkCredential("MyUserName", "MyPassword", "MyDomain")); log.Info($"ProcessEngine-: Getting Work Items "); foreach(FileItem fileItem in GetWorkItems()) { log.Info($"ProcessEngine-: Processing Work Item ... " + fileItem.FileName + " / "+ fileItem.FileGuid); //log what your processing log.Info($"ProcessEngine-: Downloading... "); //Download MemoryStream memStream = DownloadItem(fileItem); //Upload & classify log.Info($"ProcessEngine-: Classifying... "); ImageClassification imageClassification = PostDataReturnClassifier(configData.ClassificationServiceUrl, memStream, fileItem, null); //Update list entry log.Info($"ProcessEngine-: Updating Metadata... "); TagFile(fileItem,imageClassification); } log.Info($"ProcessEngine-End: {DateTime.Now} "); } public static void Run(TimerInfo myTimer, TraceWriter log) { log.Info($"Run-Start: {DateTime.Now} "); ConfigData configData = ConfigData.GetInstance(); configData.Logger = log; ProcessEngine(); log.Info($"Run-End: {DateTime.Now} "); }(); string keyWords; foreach (SP.ListItem oListItem in collListItem) { log.Info("ID: "+ oListItem.Id +" \nFile Name: "+oListItem.FieldValues["FileLeafRef"]+" \nGUID: "+ oListItem.FieldValues["GUID"]); keyWords = string.Empty;
No comments:
Post a Comment