Ad

Reorder Lines In A String Based On First Two Numbers Inside

- 1 answer

I'm getting some string data from an irrelevant source, which contains (among other text) lines that follow a pattern similar to ^INSERT INTO \w VALUES (\d, .*$. What I want to do is to sort these lines based on the first (and if second is numeric, then also the second) column's value.

The code I have so far works fine for lines that have a strict increment in the first value, but when the values are duplicated, the logic breaks and undefined is output instead. I'd also like to know how I could sort based on the second number in the event of a duplication in the first one.

Sorting by multiple properties in itself would not be an issue for me, the problem is with the way my objects are set up (they don't allow for duplicate IDs because of the use of 1 object key per ID).

var data = 
"--\n\
-- PostgreSQL database dump\n\
--\n\
\n\
SET statement_timeout = 0;\n\
SET lock_timeout = 0;\n\
SET client_encoding = 'UTF8';\n\
SET standard_conforming_strings = on;\n\
SET check_function_bodies = false;\n\
SET client_min_messages = warning;\n\
\n\
-- Working fine\n\
INSERT INTO c VALUES (3, 'c', 45);\n\
INSERT INTO c VALUES (1, 'a', 11);\n\
INSERT INTO c VALUES (5, 'e', 77);\n\
INSERT INTO c VALUES (4, 'd', 76);\n\
INSERT INTO c VALUES (2, 'b', 33);\n\
\n\
--\n\
-- Name: a; Type: TABLE; Schema: public\n\
--\n\
CREATE TABLE a (\n\
    first integer NOT NULL,\n\
    second integer NOT NULL\n\
);\n\
\n\
--\n\
-- Name: a_first_second; Type: CONSTRAINT; Schema: public\n\
--\n\
ALTER TABLE ONLY a\n\
    ADD CONSTRAINT a_first_second PRIMARY KEY (first, second);\n\
\n\
-- Breaks\n\
INSERT INTO a VALUES (1, 1);\n\
INSERT INTO a VALUES (2, 5);\n\
INSERT INTO a VALUES (4, 2);\n\
INSERT INTO a VALUES (5, 6);\n\
INSERT INTO a VALUES (2, 4);\n\
INSERT INTO a VALUES (3, 7);\n\
INSERT INTO a VALUES (4, 6);\n\
INSERT INTO a VALUES (2, 7);\n\
INSERT INTO a VALUES (6, 9);\n\
\n\
-- All good here\n\
INSERT INTO b VALUES (2, 'b', 'Description');\n\
INSERT INTO b VALUES (3, 'c', 'Description\n\
2nd line of description');\n\
INSERT INTO b VALUES (1, 'a', 'Description');\n\
INSERT INTO b VALUES (4, 'd', 'Description\n\
2nd line of description\n\
3rd line of description');\n\
INSERT INTO b VALUES (5, 'e', 'Description');";
/*/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////*/document.body.innerHTML='<code><pre></pre></code>';console = {log:function(input){document.getElementsByTagName('pre')[0].innerHTML+=input+'\n'}};
var groups = {},
    test = /INSERT INTO "?([a-z]+)"?\s*VALUES\s*\((\d+),[\s\S]+?;/g;
data.replace(test,function(row,group,field){
  if (typeof groups[group] !== 'object')
    groups[group] = {};
  groups[group][field] = row;
  return row;
});
var sortedGroupKeys = {},
    groupStep = {};
for (var j = 0, k = Object.keys(groups), l = k.length; j<l; j++){
  var group = k[j];
  sortedGroupKeys[group] = Object.keys(groups[group]).sort(function(a,b){
      return parseInt(a, 10) - parseInt(b, 10);
  });
  groupStep[group] = 0;
}
data = data.replace(test,function(row,group){
  var nextSortedKeyIndex = groupStep[group]++,
      nextSortedKey = sortedGroupKeys[group][nextSortedKeyIndex];

  return groups[group][nextSortedKey];
});
console.log(data);

Ad

Answer

Building on @Shaunt's suggestion, I changed the script to use an array of complete statements stored on a per-table basis, which I then parse separately inside a custom sort function.

var parseRow = function(r){
      var match = r.match(/VALUES \((\d+)(?:, (\d+|NULL))?[, )]/);
      if (!match)
        return [];
      return [match[1], match[2]];
    }, 
    test = /INSERT INTO "?([a-z]+)"?\s*VALUES\s*\((\d+),[\s\S]+?;/g,
    Tables = {},
    TableCounters = {};

data.replace(test,function(row,table){
  if (typeof Tables[table] !== 'object')
    Tables[table] = [];
  Tables[table].push(row);
  TableCounters[table] = 0;
  return row;
});
for (var j = 0, k = Object.keys(Tables), l = k.length; j<l; j++){
  var table = k[j];
  Tables[table].sort(function(a,b){
    a = parseRow(a);
    b = parseRow(b);

    var ix = 0;
    if (a[0] === b[0] && a[1] !== 'NULL' && b[1] !== 'NULL')
      ix++;

    a[ix] = parseInt(a[ix], 10);
    b[ix] = parseInt(b[ix], 10);

    return a[ix] > b[ix] ? 1 : (a[ix] < b[ix] ? -1 : 0);
  })
}
data = data.replace(test,function(row,table){
  var nextRowIndex = TableCounters[table]++;

  return Tables[table][nextRowIndex];
});

Working example:

var data = 
"--\n\
-- PostgreSQL database dump\n\
--\n\
\n\
SET statement_timeout = 0;\n\
SET lock_timeout = 0;\n\
SET client_encoding = 'UTF8';\n\
SET standard_conforming_strings = on;\n\
SET check_function_bodies = false;\n\
SET client_min_messages = warning;\n\
\n\
-- Working fine\n\
INSERT INTO c VALUES (3, 'c', 45);\n\
INSERT INTO c VALUES (1, 'a', 11);\n\
INSERT INTO c VALUES (5, 'e', 77);\n\
INSERT INTO c VALUES (4, 'd', 76);\n\
INSERT INTO c VALUES (2, 'b', 33);\n\
\n\
--\n\
-- Name: a; Type: TABLE; Schema: public\n\
--\n\
CREATE TABLE a (\n\
    first integer NOT NULL,\n\
    second integer NOT NULL\n\
);\n\
\n\
--\n\
-- Name: a_first_second; Type: CONSTRAINT; Schema: public\n\
--\n\
ALTER TABLE ONLY a\n\
    ADD CONSTRAINT a_first_second PRIMARY KEY (first, second);\n\
\n\
-- Breaks\n\
INSERT INTO a VALUES (1, 1);\n\
INSERT INTO a VALUES (2, 5);\n\
INSERT INTO a VALUES (4, 2);\n\
INSERT INTO a VALUES (5, 6);\n\
INSERT INTO a VALUES (2, 4);\n\
INSERT INTO a VALUES (3, 7);\n\
INSERT INTO a VALUES (4, 6);\n\
INSERT INTO a VALUES (2, 7);\n\
INSERT INTO a VALUES (6, 9);\n\
\n\
-- All good here\n\
INSERT INTO b VALUES (2, 'b', 'Description');\n\
INSERT INTO b VALUES (3, 'c', 'Description\n\
2nd line of description');\n\
INSERT INTO b VALUES (1, 'a', 'Description');\n\
INSERT INTO b VALUES (4, 'd', 'Description\n\
2nd line of description\n\
3rd line of description');\n\
INSERT INTO b VALUES (5, 'e', 'Description');";
/*//////////////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////*/document.body.innerHTML='<code><pre></pre></code>';console = {log:function(input){document.getElementsByTagName('pre')[0].innerHTML+=input+'\n'}};
var parseRow = function(r){
      var match = r.match(/VALUES \((\d+)(?:, (\d+|NULL))?[, )]/);
      if (!match)
        return [];
      return [match[1], match[2]];
    }, 
    test = /INSERT INTO "?([a-z]+)"?\s*VALUES\s*\((\d+),[\s\S]+?;/g,
    Tables = {},
    TableCounters = {};

data.replace(test,function(row,table){
  if (typeof Tables[table] !== 'object')
    Tables[table] = [];
  Tables[table].push(row);
  TableCounters[table] = 0;
  return row;
});
for (var j = 0, k = Object.keys(Tables), l = k.length; j<l; j++){
  var table = k[j];
  Tables[table].sort(function(a,b){
    a = parseRow(a);
    b = parseRow(b);

    var ix = 0;
    if (a[0] === b[0] && a[1] !== 'NULL' && b[1] !== 'NULL')
      ix++;

    a[ix] = parseInt(a[ix], 10);
    b[ix] = parseInt(b[ix], 10);

    return a[ix] > b[ix] ? 1 : (a[ix] < b[ix] ? -1 : 0);
  })
}
data = data.replace(test,function(row,table){
  var nextRowIndex = TableCounters[table]++;

  return Tables[table][nextRowIndex];
});
console.log(data);

Ad
source: stackoverflow.com
Ad