function(doc) { var id = doc.getId(); if (id !== null) { var pattern = "https://www.mydomain.com/links/contact/?"; // 0 means the pattern was found so drop the doc return (id.indexOf(pattern) == 0) ? null : doc; } return doc;}
Format Date to Solr Date
Copy
// For example:// From: 26/Mar/2015:14:38:48 -0700// To: 2015-03-26T14:38:48Z (Solr format)function(doc) { if (doc.getId() !== null) { var inboundPattern = "dd/MMM/yyyy':'HH:mm:ss Z"; // modify this to match the format of the inbound date var solrDatePattern = "yyyy-MM-dd'T'HH:mm:ss'Z'"; // leave this alone var dateFieldName = "apachelogtime"; // change this to your date field name var solrFormatter = new java.text.SimpleDateFormat(solrDatePattern); var apacheParser = new java.text.SimpleDateFormat(inboundPattern); var dateString = doc.getFirstFieldValue(dateFieldName); logger.info("**** dateString: " + dateString); var inboundDate = apacheParser.parse(dateString); logger.info("**** inboundDate: " + inboundDate.toString()); var solrDate = solrFormatter.format(inboundDate); logger.info("**** solrDate: " + solrDate.toString()); doc.setField(dateFieldName, solrDate.toString()); } return doc;}
Replace whitespace and newlines
Copy
function(doc) { if (doc.getId() !== null) { var fields = ["col1", "col2", "col3"]; for (i = 0; i < fields.length; i++ ) { var field = fields[i]; var value = doc.getFirstFieldValue(field); logger.info("BEFORE: Field " + field + ": *" + value + "*"); if (value != null) { value = value.replace(/^\s+/, ""); // remove leading whitespace logger.info("AFTER: Field " + field + ": *" + value + "*"); value = value.replace(/\s+$/, ""); // remove trailing whitespace logger.info("AFTER: Field " + field + ": *" + value + "*"); value = value.replace(/\s+/g, " "); // multiple whitespace to one space logger.info("AFTER: Field " + field + ": *" + value + "*"); doc.setField(field, value); } } } return doc;}
Split the values in a field
Copy
//Split On a delimiter. In this case, a newlinefunction(doc){ if (doc.getId() !== null) { var fromField = "company2_ss"; var toField = "company2_ss"; var delimiter = "\n"; var oldList = doc.getFieldValues(fromField); var values = []; // parse the entries one at a time doc.removeFields(toField); // clear out the target field for (i = 0; i < oldList.size(); i++) { values[i] = oldList.get(i); // get the list of strings split by the delimiter newList = values[i].split(delimiter); for(j = 0; j < newList.length; j++ ){ doc.addField(toField, newList[j]); } } } return doc;}
When entering configuration values in the UI, use unescaped characters, such as \t for the tab character. When entering configuration values in the API, use escaped characters, such as \\t for the tab character.