There are a few options already for wrapping calls to Elastic Search (
such as ElasticSearch.Net and NEST) however, I wanted to create an object oriented strongly type interface for some simple queries - (with a view to also being able to convert to this from a
CDSA "where clause").
My particular implementation of Elastic search, Azure Search, can be called using a restful API interface with OData and Lucene syntax. So my idea was then eventually any OO representation of a query can eventually then be boiled down to a URL with a querystring (similar to how CDSA converts OO clauses to SQL).
My first step was to create an abstraction around the concept of a "filter" that can be applied to a search:
public abstract class Filter
{
public abstract string GetQueryString();
}
Since we are most likely going to be dealing with collections of filters, I created an extension to provide easy access to this method:
internal static class FilterExtensions
{
public static string AsQueryString(this IEnumerable<Filter> filters)
{
return string.Join(" and ", filters.Select(f => f.GetQueryString()));
}
}
The first concrete implementation is always required by Azure Search, it's the API version you intend to work with:
internal sealed class ApiVersionFilter : Filter
{
public override string GetQueryString()
{
return $"api-version=2016-09-01";
}
}
Most fields you are querying will either be some filterable scalar value like strings and numbers or collection of these values in a collection column. I represented these types of query using two classes:
public class FieldValuesFilter : Filter
{
private IEnumerable<IFieldValue> selectedValues;
private string @operator;
public FieldValuesFilter(string fieldName, IEnumerable<IFieldValue> selectedValues)
: this(fieldName, "eq", selectedValues)
{
}
public FieldValuesFilter(string fieldName, string @operator, IEnumerable<IFieldValue> selectedValues)
{
this.FieldName = fieldName;
this.selectedValues = selectedValues;
this.@operator = @operator;
}
public string FieldName { get; private set; }
public override string GetQueryString()
{
return "(" + string.Join(" or ", this.selectedValues.Select(v => $"{this.FieldName} {this.@operator} {v.GetFormattedValue()}")) + ")";
}
}
public class ArrayValueFilter : Filter
{
private IEnumerable<IFieldValue> selectedValues;
private string @operator;
public ArrayValueFilter(string fieldName, IEnumerable<IFieldValue> selectedValues)
: this(fieldName, "any", selectedValues)
{
}
public ArrayValueFilter(string fieldName, string @operator, IEnumerable<IFieldValue> selectedValues)
{
this.FieldName = fieldName;
this.selectedValues = selectedValues;
this.@operator = @operator;
}
public string FieldName { get; private set; }
public override string GetQueryString()
{
return "(" + string.Join(" or ", this.selectedValues.Select(v => $"{this.FieldName}/{this.@operator}(t: t eq {v.GetFormattedValue()})")) + ")";
}
}
You will notice that to abstract the field type I use an interface for IFieldValue for passing in the filter values, this is because depending on whether the data type is a string or a number the formatting will change.
The interface and the two implementing classes are below:
public interface IFieldValue
{
object GetValue();
string GetFormattedValue();
}
public class StringFieldValue : IFieldValue
{
private string value;
public StringFieldValue(string value)
{
this.value = value;
}
public string GetFormattedValue()
{
return $"'{this.value}'";
}
public object GetValue()
{
return this.value;
}
}
public class IntegerFieldValue : IFieldValue
{
private int value;
public IntegerFieldValue(int value)
{
this.value = value;
}
public string GetFormattedValue()
{
return this.value.ToString();
}
public object GetValue()
{
return this.value;
}
}
You can create other filters, such as a "well known text" or "proximity" filter to query spatial data:
public class WktFilter : Filter
{
private string wkt;
public WktFilter(string wkt)
{
this.Wkt = wkt;
}
public string Wkt
{
get
{
return this.wkt;
}
set
{
this.wkt = value;
}
}
public override string GetQueryString()
{
return $"geo.intersects(location, geography'{this.Wkt}')";
}
}
public class ProximityFilter : Filter
{
private double latitude;
private double longitude;
private double radiusInMeters;
public ProximityFilter(double latitude, double longitude, double radiusInMeters)
{
this.latitude = latitude;
this.longitude = longitude;
this.radiusInMeters = radiusInMeters;
}
public override string GetQueryString()
{
// Azure Search works with KM not M, so div by 1000
return $"geo.distance(location, geography'POINT({this.longitude} {this.latitude})') le {this.radiusInMeters / 1000d}";
}
}
Now that we have the ability to create filters using C# classes and for more derivatives to be added if you more more strong typing, then we need a way of converting this to an Azure Search query. For simplicity I again started with a base class representing a given service which encapsulates the ability to convert a set of filters into a query string or post body and holds the config and endpoint for the index:
public abstract class SearchServiceBase
{
private readonly AzureServiceConfig config;
public SearchServiceBase(AzureServiceConfig config)
{
this.config = config;
}
public abstract string Api { get; }
public AzureServiceConfig Config
{
get
{
return this.config;
}
}
protected string GetQueryString(string name, IEnumerable<Filter> filters, int? top, int? skip)
{
// parse the input parameters
StringBuilder requestParameters = new StringBuilder();
// name param
if (!string.IsNullOrEmpty(name))
{
requestParameters.Append($"search={name}&");
}
// add API version by default
requestParameters.Append(new ApiVersionFilter().GetQueryString() + "&");
if (top.HasValue)
{
requestParameters.Append($"$top={top.Value.ToString()}&");
}
if (skip.HasValue)
{
requestParameters.Append($"$skip={skip.Value.ToString()}&");
}
if (skip.HasValue && skip.Value == 0)
{
requestParameters.Append($"$count=true&");
}
// filters could be none, one or many
if (filters != null && filters.Any())
{
requestParameters.Append($"$filter={filters.AsQueryString()}&");
}
// get the resource
return requestParameters.Length > 0 ? "?" + requestParameters.ToString().TrimEnd('&') : string.Empty;
}
protected string GetPostQueryString()
{
return "?" + new ApiVersionFilter().GetQueryString();
}
protected string GetPostBody(string name, IEnumerable<Filter> filters, int? top, int? skip)
{
// parse the input parameters
StringWriter sw = new StringWriter();
JsonTextWriter writer = new JsonTextWriter(sw);
// {
writer.WriteStartObject();
if (skip.HasValue && skip.Value == 0)
{
writer.WritePropertyName("count");
writer.WriteValue("true");
}
if (!string.IsNullOrEmpty(name))
{
writer.WritePropertyName("search");
writer.WriteValue(name);
writer.WritePropertyName("searchMode");
writer.WriteValue("all");
}
if (top.HasValue)
{
writer.WritePropertyName("top");
writer.WriteValue(top.Value.ToString());
}
if (skip.HasValue)
{
writer.WritePropertyName("skip");
writer.WriteValue(skip.Value.ToString());
}
// filters could be none, one or many
if (filters != null && filters.Any())
{
writer.WritePropertyName("filter");
writer.WriteValue(filters.AsQueryString()); // querystring is same format as POST property value
}
// }
writer.WriteEndObject();
return sw.ToString();
}
}
So an example subclass which consumes this functionality:
public class ExampleSearchService : SearchServiceBase
{
private readonly string indexName;
public ExampleSearchService(AzureServiceConfig config, string indexName)
: base(config)
{
this.indexName = indexName;
}
public override string Api
{
get
{
return $"indexes/{this.indexName}/docs/search";
}
}
public async Task<IEnumerable<ExampleSearchResult>> GetResultsAsync(string name, IEnumerable<Filter> filters, int maxResultCount, bool usePost = false)
{
using (var webApiClient = new AzureElasticSearchClient(this.Config))
{
webApiClient.Timeout = new TimeSpan(0, 20, 0);
var results = new List<ExampleSearchResult>();
int pageSize = 1000;
int pagesToRetrieve = 1;
int pagesRetrieved = 0;
// at least one page, but may be more..
while (pagesRetrieved < pagesToRetrieve)
{
HttpResponseMessage result = null;
if (usePost)
{
string requestUrl = $"{this.Api}{this.GetPostQueryString()}";
string requestBody = this.GetPostBody(name, filters, pageSize, pagesRetrieved * pageSize);
HttpContent content = new StringContent(requestBody, System.Text.Encoding.UTF8, "application/json");
// call the API and increment received count
result = await webApiClient.PostAsync(requestUrl, content).ConfigureAwait(false);
}
else
{
// build the query url from the filters
string requestUrl = $"{this.Api}{this.GetQueryString(name, filters, pageSize, pagesRetreived * pageSize)}";
result = await webApiClient.GetAsync(requestUrl).ConfigureAwait(false);
}
pagesRetrieved++;
// if it was successful, we can process it
if (result.IsSuccessStatusCode)
{
// parse the JSON response
var jResponse = JObject.Parse(await result.Content.ReadAsStringAsync().ConfigureAwait(false));
// if this was the first page, we can parse the item count
if (pagesRetrieved == 1)
{
int count = (int)jResponse.GetValue("@odata.count");
// check against max result count and throw exception if over that
if (maxResultCount > 0)
{
if (count > maxResultCount)
{
throw new InvalidOperationException($"Search result count of {count} was greater than the maximum of {maxResultCount}");
}
}
pagesToRetrieve = (int)Math.Ceiling((double)count / (double)pageSize);
}
// now get the value, which is the array of results
JArray jsonResults = (JArray)jResponse.GetValue("value");
// loop over the JSON array and deserialise each result obejct
foreach (var resultData in jsonResults)
{
var result = resultData.ToObject<ExampleSearchResult>();
results.Add(result);
}
}
}
return results;
}
}
}
You will notice I created a wrapper for HttpClient, this is simply to encapsulate adding the base address and API key:
internal class AzureElasticSearchClient : HttpClient
{
public AzureElasticSearchClient(AzureServiceConfig config)
{
this.BaseAddress = new Uri($"https://{config.AzureSearchServiceName}.search.windows.net/");
this.DefaultRequestHeaders.Add("api-key", config.AzureSearchApiKey);
}
}
Here are some examples of how to create and add filters then call the example search service:
[TestFixture]
public class TestExampleSearchService
{
private ExampleSearchService sut = new ExampleSearchService();
[Test]
public async Task TestGetResultsInWktWithAdditionalFilter_Elastic()
{
// arrange
var testPolygonWkt = "POLYGON ((-1.6259765625 53.74404116282134, -1.6005706787109375 53.76089000834015, -1.5696716308593748 53.73876182109416, -1.6036605834960935 53.72799803200196, -1.6259765625 53.74404116282134))";
var polyFilter = new WktFilter(testPolygonWkt);
var examplePropertyFilter = new FieldValuesFilter("SomeIntProperty", new IntegerFieldValue[] { new IntegerFieldValue(1) });
// act
var startTime = DateTime.Now;
var result = await this.sut.GetResultsAsync(string.Empty, new Filters.Filter[] { polyFilter, examplePropertyFilter });
Trace.WriteLine($"Time taken: {DateTime.Now.Subtract(startTime).TotalMilliseconds} ms");
Trace.WriteLine($"# recs {result.Count()}");
// assert
Assert.Greater(result.Count(), 0);
}
[Test]
public async Task TestGetResultsInProximityWithName_Elastic()
{
// arrange
var longi = -1.6259765625;
var lati = 53.74404116282134;
var dist = 200;
var proxFilter = new ProximityFilter(lati, longi, dist);
// act
var startTime = DateTime.Now;
var result = await this.sut.GetResultsAsync("example name", new Filters.Filter[] { proxFilter });
Trace.WriteLine($"# recs {result.Count()}");
Trace.WriteLine($"Time taken: {DateTime.Now.Subtract(startTime).TotalMilliseconds} ms");
// assert
Assert.Greater(result.Count(), 0);
}
[Test]
public async Task TestGetResultsInArrayFilter_Elastic()
{
// arrange
var possibleValues = {"hello", "world"}.Select(s => new StringFieldValue(s));
var exampleArrayPropertyFilter = new FieldValuesFilter("SomeStringArrayProperty", possibleValues);
// act
var startTime = DateTime.Now;
var result = await this.sut.GetResultsAsync(string.Empty, new Filters.Filter[] { exampleArrayPropertyFilter });
Trace.WriteLine($"Time taken: {DateTime.Now.Subtract(startTime).TotalMilliseconds} ms");
Trace.WriteLine($"# recs {result.Count()}");
// assert
Assert.Greater(result.Count(), 0);
}
}
With this structure all now in place, I wanted to plug this into an existing CDSA application which was currently using SQL for performing queries. I therefore needed a way to convert from traditional "CDSA WhereClause" objects to my new Azure Search filter structure. I created a basic implementation, which isn't 100% compatible with all clauses yet, but for most use cases it works fine:
internal class WhereClauseParser
{
Regex isNumeric = new Regex("^\\d+$");
public IEnumerable<FieldValuesFilter> ParseWhereClause(WhereClause clause)
{
if (clause == null || clause.RecursiveClauseList.Count == 0)
{
return null;
}
return this.ParseWhereClauseWithSubgroups(clause);
}
private IEnumerable<FieldValuesFilter> ParseWhereClauseWithSubgroups(WhereClause clause)
{
if (clause.ConjunctionOperator == ConjunctionOperator.And)
{
var myFilters = new List<FieldValuesFilter>();
foreach (var whereClauseElement in clause.ClauseList)
{
myFilters.Add(this.ParseWhereClauseElement(whereClauseElement));
}
if (clause.SubGroups.Count > 0)
{
foreach (var subClause in clause.SubGroups)
{
myFilters.AddRange(this.ParseWhereClauseWithSubgroups(subClause));
}
}
return myFilters;
}
else
{
throw new NotImplementedException("Elastic search clause parser currently only supports the 'AND' conjunction.");
}
}
private FieldValuesFilter ParseWhereClauseElement(WhereClauseElement whereClauseElement)
{
// start with the defaults
var fieldName = whereClauseElement.CompareItem;
var values = new object[] { whereClauseElement.CompareValue };
var @operator = string.Empty;
// don't need qualified paths, remove the dots
fieldName = fieldName.Replace(".", "");
switch (whereClauseElement.Operator)
{
case Operator.Equals:
case Operator.Like: // todo: this should be a wildcard search, not an exact equality
@operator = "eq";
break;
case Operator.NotEqual:
@operator = "ne";
break;
case Operator.GreaterThan:
@operator = "gt";
break;
case Operator.LessThan:
@operator = "lt";
break;
case Operator.GreaterThanEqualTo:
@operator = "ge";
break;
case Operator.LessThanEqualTo:
@operator = "le";
break;
case Operator.In:
@operator = "eq";
values = values[0].ToString().Split(',');
// if it was an array of numbers stored as CSV, then unpack the numbers as ints.
if (values.All(v => this.isNumeric.IsMatch(v.ToString())))
{
values = values.Select(v => (object) int.Parse(v.ToString())).ToArray();
}
break;
default:
throw new NotImplementedException("Elastic search clause parser currently does not support this operator");
}
return new FieldValuesFilter(fieldName, @operator, values.Select(v => this.FieldValueFactory(v)));
}
private IFieldValue FieldValueFactory(object v)
{
// a dirty type check, lets call it a "factory"
if (v is int)
{
return new IntegerFieldValue((int)v);
}
else
{
return new StringFieldValue(v.ToString());
}
}
}