Javascript Index Stage

For a complete description of the Javascript Index stage and additional examples, see: Custom JavaScript Stages For Index Pipelines.

Examples

Drop a document by ID
function(doc) {
  var id = doc.getId();
  if (id !== null) {
    var pattern = "https://www.mydomain.com/links/contact/?";

    // 0 means the pattern was found so drop the doc
    return (id.indexOf(pattern) == 0) ? null : doc;
  }

  return doc;
}
Format Date to Solr Date
// For example:
// From: 26/Mar/2015:14:38:48 -0700
// To:   2015-03-26T14:38:48Z  (Solr format)
function(doc) {
  if (doc.getId() !== null) {
    var inboundPattern = "dd/MMM/yyyy':'HH:mm:ss Z";  // modify this to match the format of the inbound date
    var solrDatePattern = "yyyy-MM-dd'T'HH:mm:ss'Z'"; // leave this alone
    var dateFieldName = "apachelogtime"; // change this to your date field name

    var solrFormatter = new java.text.SimpleDateFormat(solrDatePattern);
    var apacheParser = new java.text.SimpleDateFormat(inboundPattern);

    var dateString = doc.getFirstFieldValue(dateFieldName);
    logger.info("**** dateString: " + dateString);
    var inboundDate = apacheParser.parse(dateString);
    logger.info("**** inboundDate: " + inboundDate.toString());
    var solrDate =  solrFormatter.format(inboundDate);
    logger.info("**** solrDate: " + solrDate.toString());

    doc.setField(dateFieldName, solrDate.toString());
  }

  return doc;
}
Replace whitespace and newlines
function(doc) {
  if (doc.getId() !== null) {
    var fields = ["col1", "col2", "col3"];

    for (i = 0; i < fields.length; i++ ) {
      var field = fields[i];
      var value = doc.getFirstFieldValue(field);
      logger.info("BEFORE: Field " + field + ": *" + value + "*");

      if (value != null) {
        value = value.replace(/^\s+/, "");  // remove leading whitespace
        logger.info("AFTER: Field " + field + ": *" + value + "*");
        value = value.replace(/\s+$/, "");  // remove trailing whitespace
        logger.info("AFTER: Field " + field + ": *" + value + "*");
        value = value.replace(/\s+/g, " ");  // multiple whitespace to one space
        logger.info("AFTER: Field " + field + ": *" + value + "*");

        doc.setField(field, value);
      }
    }
  }

  return doc;
}
Split the values in a field
//Split On a delimiter. In this case, a newline
function(doc){
  if (doc.getId() !== null) {
    var fromField = "company2_ss";
    var toField = "company2_ss";
    var delimiter = "\n";

    var oldList = doc.getFieldValues(fromField);

    var values = [];

    // parse the entries one at a time
    doc.removeFields(toField); // clear out the target field
    for (i = 0; i < oldList.size(); i++) {
      values[i] = oldList.get(i);

      // get the list of strings split by the delimiter
      newList = values[i].split(delimiter);
      for(j = 0; j < newList.length; j++ ){
        doc.addField(toField, newList[j]);
      }
    }
  }
  return doc;
}

Configuration

Tip
When entering configuration values in the UI, use unescaped characters, such as \t for the tab character. When entering configuration values in the API, use escaped characters, such as \\t for the tab character.